###################################################
####                Sript RMQS                 ####
####         - Variance partitioning -         ####
####               - by phylum -               ####
####  - B. Karimi & N. Chemidlin (11042016) -  ####
###################################################

##################### Description of the script ##################### 

#### Packages --------------
library(dplyr) # transfo matrix
library(MASS)
library(rgrs) # fwd selection
library(splancs)
library(vegan) # fwd selection
library(mgcv) #
library(ade4) # rda
library(psych) # fwd selection
library(spdep) # PCNM
library(leaps) # rda
library(grDevices) # plot
library(onLoad)
library(scatterplot3d) # plot
library(ggplot2) # plot
library(plyr) # plot
library(scales) # plot
library(stringr) # expression
library(Cairo)
library(grep) # sub fct (change word), need version up to 3.2
library(doBy) # order by more than one variable
library(RcolorBrewer) # palette de couleur

#### Names of objects in the script --------------
env                   # Table with environmental data (explicative variables)
bio                   # Table with biological parameters (variables to explain)
taxon                 # Names of the studied parameter in the considered analysis
bio$taxon             # The studied variable
varcor                # Correlation between environmental variables
pca_env               # Principal Component Analysis between environmental variables
--------
coords                # Coordinates
dnnR                  # Neighbourhood relationships
dnnSIR                # Neighbourhood relationships without isolated points
listSIR               # List of neighbour
gdi                   # Distance matrix
pcnm_distance         # Spatial descriptors
pcnm.sel              # Table of descriptors significant in sense of Moran
--------
env.var               # List of names of env. var. kept in the analysis after step0
env.var.exp           # Table of env. var. kept in the analysis after step0
env.var.qual          # List of qualitative env. var. kept in the analysis after step0
env.var.exp.std       # Table of env. var. kept in the analysis after step0 and standardized
bio$taxon.std         # Biological variable standardized
model_test            # Model of selection
--------
env.var2              # List of names of env. var. kept in the analysis after step1
env.var.exp2          # Table of env. var. kept in the analysis after step1
env.var.qual2         # List of qualitative env. var. kept in the analysis after step1
env.var.num.std2      # Table of quantitative env. var. kept in the analysis after step1 and standardized
env.var.exp.std2      # Table of env. var. kept in the analysis after step1 and standardized
bio$taxon.std         # Biological variable standardized
model.rda.init        # Initial Partitioning model 
model.rda.term        # Terminal Partitioning model
mod.fwdsel            # Model with "forward" method of building
mod.bthsel            # Model with "both" method of building
var.sel               # List of variables implied in the rda/pls model
mod.par               # The most parcimonious model
test.mod              # Variance Analysis of the global model
output.mod            # Results of the variance analysis 
test.mod.pur          # Estimation of variance explained by each variable (pure effects)
output.mod.pur        # Results of the variance analysis on pure effects
coeff                 # Direction of effectsfrom environmental variables
--------
resid                 # keep the residuals of the previous model
pcnm.rda.init         # Initial Partitioning model with pcnm
pcnm.rda.term         # Terminal Partitioning model with pcnm
pcnm.fwdsel           # Model-PCNM with "forward" method of building
pcnm.sel.used         # List of spatial descriptors kept in the modell-PCNM
pcnm.par              # The most parcimonious model-PCNM
test.pcnm             # Variance Analysis of the global model-PCNM
output.pcnm           # Results of the variance analysis
tot_exp               # Addition of environmental parameters and spatial descriptors
output.net            # Results of the complete variance analysis with global effect
output.net.pur        # Resume all pures effects 
  

#### Importing data and selection of metropolitan sites --------------
setwd('/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Data/Data used/')
id<-read.table("id.ech.RMQS.txt", h=T, sep="")                                                   # import tab "id"
id<-id[order(id$code_site),]                                                                     # order tab "id"
phylum<-read.table("Taxonomy_phylum.txt", h=T, stringsAsFactors=T, na.strings="NA", skip=1)      # import tab "phylum"
phylum<-phylum[order(phylum$code_site),]                                                         # order tab "phylum"
class<-read.table("Taxonomy_class.txt", h=T, stringsAsFactors=T, na.strings="NA", skip=1)        # import tab "class" for proteobacteria
class<-class[order(class$code_site),]                                                            # order tab "class"
cov<-read.csv2("covariable RMQS.csv", h=T, skip=1, na.strings="NA", sep=";", dec=",") # 2147     # import tab with environmental data
cov<-cov[order(cov$code_site),]                                                                  # order tab of covariables
sitecorse<-cov$code_site[cov$code_dept== "2A" | cov$code_dept== "2B"]                            # select sites in Corsica in the tab of covariables
covmetro<-cov[is.element(cov$code_site, sitecorse)==FALSE,] # 2115                               # delete Corsica from the tab cov to select only metropolitan sites
phylmetro<-phylum[is.element(phylum$code_site, covmetro$code_site),] # 1800                      # select the common sites between phylum and cov based on covmetro
classmetro<-class[is.element(class$code_site, covmetro$code_site),] # 1800                       # select the common sites between class and cov based on covmetro
covmetro<-covmetro[is.element(covmetro$code_site, phylmetro$code_site),] # 1800                  # subset the covariables from the biological tab to save only common sites
covmetro<-covmetro[order(covmetro$code_site),]                                                   # reorder covmetro with the number of sites
varint<- c("x_reel", "y_reel", "Elevation",                                                      # Topographie  # Create the vector of variable to save in the analysis
           "Clay", "Silt", "Sand",                                                               # Pedo/ structure
           "pH", "CEC", "C_N", "Organic_Carbon", "Nitrogen", "CaCO30", "K_tot", "P_ass",         # Pedo/ Nutriments
           "Cd_tot", "Cu_tot", "Ni_tot", "Pb_tot", "Zn_tot",                                     # Pedo/ métaux        
           "code_occup",                                                                         # Mode d'usage
           "ETP_mean", "Rain_mean", "Tem_mean")                                                  # Climat
cov.coord<-covmetro[, c(1, 5, 6)]                                                                # keep the code_site and the coordinates in cov.coord dataframe
coords=cov.coord[,c(2,3)]                                                                        # only coord
dnnR<-dnearneigh(as.matrix(coords),d1=15000,d2=23000)                                            # search the neighbour
coords.IP <- cov.coord[card(dnnR)==0,]                                                           # identify the point without neighbour                                     #
covmetro<-covmetro[!cov.coord$code_site%in%coords.IP$code_site,]                                 # select the covmetro without isolated point
covmetro2<-covmetro[, is.element(colnames(covmetro),varint)]                                     # select only interesting variables

var.names<-c("x_reel", "y_reel", "Elevation", 
             "Clay", "Silt", "Sand", 
             "pH", "TpH", "CEC", "C_N", "Organic_Carbon", "Nitrogen", "CaCO30", "K_tot", "P_ass", 
             "Cd_tot", "Cu_tot", "Ni_tot", "¨Pb_tot", "Zn_tot",
             "ETP_mean", "Rain_mean", "Tem_mean", 
             "code_occup", "code_occup1", "code_occup2", "code_occup3", "code_occup4")
legends<-c("Longitude", "Latitude", "Altitude",
           "Teneur en Argile", "Teneur en Limon", "Teneur en Sable",
           "pH", "Concentration en protons", "Cations échangeables", "Ratio Carbone/Azote", "Carbone organique", "Azote", "Carbonates", "Potassium", "Phosphore", 
           "Cadmium", "Cuivre", "Nickel", "Plomb", "Zinc",
           "Evapotranspiration", "Précipitation", "Température", 
           "Mode d'usage des sols", "Cultures", "Prairies", "Forêts", "Vignes/Vergers")
col.leg<-c("yellow1", "yellow1", "yellow1", 
           "orange1", "orange1", "orange1", 
           "orange1", "orange1", "orange1", "orange1", "orange1", "orange1", "orange1", "orange1", "orange1",
           "purple1", "purple1", "purple1", "purple1", "purple1",
           "blue1", "blue1", "blue1", 
           "green", "green1", "green1", "green1", "green1")
col.leg<-c("yellow1", "yellow1", "yellow1", 
           "darkorange", "darkorange", "darkorange", 
           "darkorange", "darkorange", "darkorange", "darkorange", "darkorange", "darkorange", "darkorange", "darkorange", "darkorange",
           "purple1", "purple1", "purple1", "purple1", "purple1",
           "deepskyblue", "deepskyblue", "deepskyblue", 
           "limegreen", "limegreen", "limegreen", "limegreen", "limegreen")
nomenc<-cbind(var.names, legends, col.leg)

selection<-c("Alphaproteobacteria", "Betaproteobacteria", "Deltaproteobacteria", "Epsilonproteobacteria", "Gammaproteobacteria")
proteometro<-classmetro[,selection]                                                              # select of proteobacteria in the class tab
ppm<-cbind(phylmetro, proteometro) # ppm = phylum+proteo métropole                               # create the data with all biological groups
ppm.tmp<-ppm[!ppm$code_site%in%coords.IP$code_site,4:ncol(ppm)]                                  # select the biological data without isolated point  
ppm<-cbind(ppm[!ppm$code_site%in%coords.IP$code_site,1:3], ppm.tmp[,colSums(as.matrix(ppm.tmp))>0])    # select the phyla with data only
ppm<-ppm[order(ppm$code_site),]                                                                  # order the tab of biological groups according to the code_site

###
write.csv(nomenc, "/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Partition de variance/Data used/nomenclature.csv")
write.csv(ppm, "/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Partition de variance/Data used/Taxonomy_phylum-classe.metropol.csv")
write.csv(covmetro2, "/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Partition de variance/Data used/covariable RMQS.metropol.csv")

setwd('/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Data/Data used/')
nomenc<-read.csv2("/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Partition de variance/Data used/nomenclature.csv", sep=",", row.names=1)
ppm<-read.csv2("Taxonomy_phylum-classe.metropol.csv", h=T, sep=",", row.names=1)
covmetro<-read.csv2("covariable RMQS.metropol.csv", h=T, dec=".",  row.names=1)


env<-as.data.frame(covmetro) # env is the generic name of environmental variables within the script
env$TpH<-10^(-env$pH)
bio<-ppm # bio is the generic name of independant variables matrix within the script

#### Choosing the independant variable and creating the results folder --------------
taxon<-colnames(bio)[7]
bio$taxon<-bio[,7]
CHEMIN=paste('/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Partition de Variance/Resultats')
liste_chemins=path.expand(paste(CHEMIN,paste(taxon, "2", sep=""), sep="/")) ## Dossier num de perm contenant les résultats d'analyse
dir.create(liste_chemins)
setwd(liste_chemins)

##################### STEP 0: Descriptive analysis according the various dependant variables ##################### 

envpartiel<-env[,-c(20:25)] # 19 correspond au mode d'occup, un facteur, qu'on ne met pas dans les cercles de corrélation
#### Simple linear Regression
pdf(paste(liste_chemins, "lm.fit.pdf", sep="/"))
for (j in 1:ncol(env)){
  plot(bio$taxon~env[,j], xlab=colnames(env)[j], ylab=taxon)  
}  
dev.off()

#### Correlation calculation ####
varcor<-corr.test(envpartiel, method = "pearson") ### only numeric data
write.table(varcor$r, file="Matrice de correlation.txt") # Uuseless??
write.table(varcor$p, file="Matrice de correlation_Pvalue.txt") # Useless??
pairs(envpartiel, upper.panel=NULL)
pdf(paste(liste_chemins, "env.correlation.pdf", sep="/"))
for (i in 1:(ncol(envpartiel)-1)){
  for (j in (i+1):ncol(envpartiel)){
    plot(envpartiel[,j]~envpartiel[,i], xlab=colnames(envpartiel)[i], ylab=colnames(envpartiel)[j])
  }  
}
dev.off()

#### Correlation circle with PCA ####
pca_env<-dudi.pca(envpartiel, center = TRUE, scale = TRUE, nf=5, scannf = F)
pdf(paste(liste_chemins, "correlation.circle.pdf", sep="/"))
#scatter.dudi(pca_env, cex=0.5, box=FALSE)
s.corcircle(pca_env$co,1,2)
dev.off()

##################### STEP 1: Principal Coordinates of neighbour matrices ##################### 
coords=cov.coord[,c(2,3)]  ##### 1800 common sites

#### Neighbourhood relationship: Queen relation at the rank 1 
dnnR<-dnearneigh(as.matrix(coords),d1=15000,d2=23000) # 23km on the diagonal
#### Missing the isolated points
coords.SIR <- coords[card(dnnR)!=0,]
dnnSIR<-dnearneigh(as.matrix(coords.SIR),d1=15000,d2=23000)
listSIR<-nb2listw(dnnSIR)  ##List of neighbours
### Spatial descriptors
gdi <- as.matrix(dist(coords.SIR)) # euclidian
#PCNM using coordinates to define the spatial autocorrelation matrix between sites
pcnm_distance <- pcnm(gdi,thresh= 23000) # same threshold than Multispatial. Give new spatial parameters based on the distance matrix
summary(pcnm_distance)

x11(type="cairo")
plot(dnnSIR, coords.SIR, cex=0.5)
savePlot(filename="/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Partition de Variance/Resultats/env global/pcnm/PCNMs", type="jpeg")
dev.off()

####  Testing of the Moran's I to determine the amount of eigenvector significantly spatially structured in the pcnm_distance according to Dray, Legendre et Peres Neto 2006
# It need the neighbourood relationship between regional sites

tmp=NULL
dimension=dim(pcnm_distance$vectors)
for (k in 1:dimension[2]){
  test_moran<-moran.mc(pcnm_distance$vectors[,k], listSIR,1000)
  print(k)
  tmp=rbind(tmp,cbind(k,test_moran$statistic, test_moran$p.value))
}
colnames(tmp)=c('no_PCNM', 'Moran_I', 'p.value')
write.csv(tmp, as.character("/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Partition de Variance/Resultats/env global/pcnm/PCNM_Moran_test.csv"))
# fix(tmp)
kk=tmp[tmp[,2]>mean(tmp[,2])& tmp[,3]<0.001 ,1]
pcnm.sel=as.data.frame(pcnm_distance$vectors[,kk])
dim(pcnm.sel)
colnames(pcnm.sel)=c(kk)
write.table(pcnm.sel, file="/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Partition de Variance/Resultats/env global/pcnm/PCNM_coords.txt", append=FALSE)
pcnm.sel<- read.table(file="/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Partition de Variance/Resultats/0-Env global/pcnm/PCNM_coords.txt", dec=".")

##################### STEP 2: Pre-selection of variables ##################### 

# source("/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Partition de Variance/Script/ISV.fct.R")
# x<-sel.data(env)
#colnames(env)
#working.var<-paste("'", colnames(env)[1], sep="")
#for(i in 2:ncol(env)){
#  working.var<-paste(working.var, "' , '", colnames(env)[i], sep="")
#}
#working.var<-paste(working.var, "'", sep="")

#### Selection of variables according the previous analysis and Subsetting the appropriate dataframe
env.var<-c(  'x_reel' , 
             'y_reel' , 
             'Elevation' , 
             'Clay' , 
             'Silt' , 
            #'Sand' , 
             'pH' ,
            #'TpH' ,
            #'CEC' , 
             'C_N' ,
             'Organic_Carbon' ,
            #'Nitrogen' , 
            #'CaCO30' , 
             'K_tot' , 
             'P_ass' , 
            #'Cd_tot' , 
            #'Cu_tot' , 
            #'Ni_tot' , 
            #'Pb_tot' , 
            #'Zn_tot' ,
            #'ETP_mean' , 
            #'Rain_mean' , 
             'Tem_mean')

#### Standardizing the quantitative data 
env.var.std<-as.data.frame(scale(envpartiel[ env.var], center=TRUE, scale=TRUE))
bio$taxon.std<-scale(bio$taxon, center=T, scale=T)
env$code_occup<-as.factor(ifelse(env$code_occup1>=5, "5", env$code_occup1))
#### Addition of qualitative data
contrasts(env$code_occup) = contr.treatment(levels(env$code_occup), base=5) # to compare on the base of the poorest informative level
env.var.exp<-cbind(env.var.std, env$code_occup)
colnames(env.var.exp)<- c(env.var, "code_occup")

#### Selection of variables with regsubsets function (package leaps)
model_test<-regsubsets(bio$taxon.std~., data=env.var.exp, method="exhaustive", nvmax=length(env.var))
pdf(paste(liste_chemins, "regsubsets.pdf", sep="/"))
plot(model_test, scale="bic", main=paste(taxon,"_regsubsets_BIC", sep=""))
plot(model_test, scale="adjr2", main=paste(taxon, "_regsubsets_R2 Ajuste", sep=""))
dev.off()

#### Selection of significant variables in the 5 or 6 best models
env.var2<-c( #'x_reel' , 
             'y_reel' , 
             #'Elevation' , 
             'Clay' , 
             #'Silt' , 
             #'Sand' , 
             'pH', 
             #'TpH' ,
             #'CEC' , 
             #'C_N' ,  
             #'Organic_Carbon'
             #'Nitrogen' , 
             #'CaCO30' , 
             #'K_tot' 
             'P_ass', 
             #'Cd_tot' , 
             #'Cu_tot' , 
             #'Ni_tot' , 
             #'Pb_tot' , 
             #'Zn_tot' ,
             #'ETP_mean' , 
             #'Rain_mean' , 
             'Tem_mean'
             )

env.var.std2<-as.data.frame(scale(envpartiel[ env.var2], center=TRUE, scale=TRUE))
env.var.exp2<-cbind(env.var.std2, env$code_occup)
colnames(env.var.exp2)<- c(env.var2, "code_occup")
env.var.qual2<-c('code_occup')

bio$taxon.std<-scale(bio$taxon, center=T, scale=T)

##################### STEP 3: Identification of the best model and Variance Partitioning #####################

#### Model bounding 
model.rda.init<-rda(bio$taxon.std~1, env.var.exp2)
fmla<-as.formula(paste("bio$taxon.std ~ ", paste(colnames(env.var.exp2), collapse=" + ")))
model.rda.term<-rda(fmla, env.var.exp2) # paste(env.var2, collapse=" + "),

#### Identifying the most parcimonious model
mod.fwdsel<-ordiR2step(model.rda.init,model.rda.term, direction="forward", Pin=0.1, Pout=0.2, pstep = 200, perm.max = 10000, steps = 500, trace = TRUE)
mod.fwdsel$anova
mod.bthsel<-ordiR2step(model.rda.init,model.rda.term, direction="both", Pin=0.1, Pout=0.2, pstep = 200, perm.max = 10000, steps = 500, trace = TRUE)
mod.bthsel$anova

#### Estimation of the explained variance given autocorrelations
var.sel<-attr(mod.fwdsel$terms, 'term.labels')
fmla<-as.formula(paste("bio$taxon.std~", paste(var.sel, collapse="+")))

#### PLS for the most parcimonious model
mod.par<-rda(fmla, env.var.exp2)

#### Variance Partitioning
# Total variance explained
test.mod<-anova.cca(mod.par, alpha=0.05, beta=0.01, step=500, perm.max=10000)
output.mod<-as.data.frame(test.mod)
output.mod$variance_tot<-sum(output.mod$Variance)
output.mod$variance_exp=output.mod$Variance/output.mod$variance_tot*100
print(output.mod)

# Variance explained for each explicative variables (effets purs)
test.mod.pur<-anova.cca(mod.par, alpha=0.05, beta=0.01, step=500, perm.max=10000,by='margin')
output.mod.pur<-as.data.frame(test.mod.pur)
output.mod.pur$variance_tot=output.mod$variance_tot[1]
output.mod.pur$variance_exp=output.mod.pur$Variance/output.mod.pur$variance_tot*100

temp=c(rownames(output.mod.pur[1:length(output.mod.pur$variance_exp)-1,]), 
       "interactions", rownames(output.mod.pur[length(output.mod.pur$variance_exp),]))
interactions=c("NA", "NA", "NA", "NA", "NA", output.mod$variance_exp[1]-sum(output.mod.pur$variance_exp[1:length(output.mod.pur$variance_exp)-1]))
output.mod.pur=rbind(output.mod.pur, interactions)
output.mod.pur[c(nrow(output.mod.pur)-1,nrow(output.mod.pur)),]<-output.mod.pur[c(nrow(output.mod.pur), nrow(output.mod.pur)-1),]
rownames(output.mod.pur)=temp

# Direction of effects
coeff=as.data.frame(coef(mod.par))
colnames(coeff)=c("coeff.norm")


# Saving residuals for more analyses
resid<-as.data.frame(residuals(mod.par))
colnames(resid)<-c("resid")
residu<-as.vector(resid$resid)
write.csv(residu, file=paste(liste_chemins, paste(taxon, "Residuals.txt", sep="."), sep="/"))

##################### STEP 4: Principal Coordinates of neighbour matrices ##################### 
pcnm.sel<- read.table(file="/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Partition de Variance/Resultats/env global/pcnm/PCNM_coords.txt", dec=".")
residu<- read.table( file=paste(liste_chemins, paste(taxon, "Residuals.txt", sep="."), sep="/"), dec=".", sep=",", h=T, row.names=1)

pcnm.sel2<-as.data.frame(pcnm.sel)
residu<-as.vector(residu$x)

#### Model bounding 
pcnm.rda.init<-rda(residu~1, data=pcnm.sel2)
fmla<-as.formula(paste("residu~", paste(colnames(pcnm.sel), collapse=" + ")))
pcnm.rda.term<-rda(fmla, data=pcnm.sel) 

#### Identifying the most parcimonious model
pcnm.fwdsel<-ordiR2step(pcnm.rda.init, pcnm.rda.term, direction="forward", Pin=0.05, Pout=0.2, pstep = 200, perm.max = 10000, steps = 500, trace = TRUE)
pcnm.fwdsel$anova
pcnm.fwdsel$anova[1,6]<-pcnm.fwdsel$anova[1, 1]
colnames(pcnm.fwdsel$anova)[6]<-c("R2.ind")
pcnm.sel.used<-attr(pcnm.fwdsel$terms, 'term.labels')
rownames(pcnm.fwdsel$anova)[1:nrow(pcnm.fwdsel$anova)-1]<-pcnm.sel.used
for (i in 2: nrow(pcnm.fwdsel$anova)){
  pcnm.fwdsel$anova[i,6]<-pcnm.fwdsel$anova[i, 1]-pcnm.fwdsel$anova[(i-1),1]
}
pcnm.fwdsel$anova$variance_exp<- pcnm.fwdsel$anova$R2.ind*100
pcnm.fwdsel$anova$coeff<-c(coef(pcnm.fwdsel), "NA")

#### Estimation of the explained variance given autocorrelations
pcnm.sel.used<-attr(pcnm.fwdsel$terms, 'term.labels')
fmla<-as.formula(paste("residu~", paste(as.character(pcnm.sel.used), collapse="+")))
#### PLS for the most parcimonious modelplot
pcnm.par<-rda(fmla, pcnm.sel)
coeff.pcnm<-as.data.frame(coef(pcnm.par))

#### Variance Partitioning
# Total variance explained
test.pcnm<-anova.cca(pcnm.par, alpha=0.05, beta=0.01, step=500, perm.max=10000)
output.pcnm<-as.data.frame(test.pcnm)
output.pcnm$variance_tot<-sum(output.pcnm$Variance)
output.pcnm$variance_exp<-output.pcnm$Variance/output.pcnm$variance_tot*100
print(output.pcnm)

##################### STEP 5: Compilation of results ##################### 
#### !!!!!!!! If PCNM succeed
#### Addition to first analysis
# model global
tot_exp<-c("<NA>", "<NA>", "<NA>", "<NA>", "<NA>", "<NA>")
output.net<-rbind(output.mod[1,], output.pcnm[1,], output.pcnm[2,], tot_exp)
rownames(output.net)<-c("Envir.parameters", "Spatial.descriptor", "Residual", "Total.explained")
if (as.numeric(output.mod.pur$variance_exp[rownames(output.mod.pur)=="interactions"])>0){
  output.net$variance_net<-rbind(output.mod$variance_exp[1], 
                                (output.pcnm$variance_exp[1]*output.mod$variance_exp[2])/100, 
                                 (output.pcnm$variance_exp[2]*output.mod$variance_exp[2])/100, 
                                 output.mod$variance_exp[1] + (output.pcnm$variance_exp[1]*output.mod$variance_exp[2])/100)
} 
if (as.numeric(output.mod.pur$variance_exp[rownames(output.mod.pur)=="interactions"])<0) {
  output.net$variance_net<-rbind(output.mod$variance_exp[1] - 
                                   as.numeric(output.mod.pur$variance_exp[rownames(output.mod.pur)=="interactions"]), 
                                 (output.pcnm$variance_exp[1]*output.mod$variance_exp[2])/100,
                                 (output.pcnm$variance_exp[2]*output.mod$variance_exp[2])/100 + 
                                   as.numeric(output.mod.pur$variance_exp[rownames(output.mod.pur)=="interactions"]),
                                 output.mod$variance_exp[1] - 
                                   as.numeric(output.mod.pur$variance_exp[rownames(output.mod.pur)=="interactions"]) + 
                                   (output.pcnm$variance_exp[1]*output.mod$variance_exp[2])/100)} 

# model détailled
rownam<-!(rownames(output.mod.pur)%in% c("code_occup", "Residual"))
output.net.pur<-rbind(output.mod.pur[c(rownam),c(4,6)], 
                      output.mod.pur[rownames(output.mod.pur)%in%"code_occup",c(4,6)],
                      output.mod.pur[rownames(output.mod.pur)%in%"code_occup",c(4,6)],
                      output.mod.pur[rownames(output.mod.pur)%in%"code_occup",c(4,6)],
                      output.mod.pur[rownames(output.mod.pur)%in%"code_occup",c(4,6)],
                      output.mod.pur[rownames(output.mod.pur)%in%"code_occup",c(4,6)],
                      pcnm.fwdsel$anova[-nrow(pcnm.fwdsel$anova), c(5,7)])

CO.coeff<-as.data.frame(coeff[grep("^code_occup", rownames(coeff)),])
rownames(CO.coeff)<-c("code_occupCO1", "code_occupCO2", "code_occupCO3", "code_occupCO4")
colnames(CO.coeff)<-c("coeff")
output.net.pur$coeff<-c(coeff$coeff.norm[!rownames(coeff) %in%  rownames(CO.coeff)], 
                        "NA", 
                        "NA", 
                        coeff$coeff.norm[rownames(coeff) %in%  rownames(CO.coeff)], coeff.pcnm$RDA1)
output.net.pur$dir<-ifelse(output.net.pur$coeff <0, "negative", "positive")

#### !!!!!!!! If PCNM aborted 
#### Addition to first analysis
# model global
#output.net<-output.mod

# model détailled
#output.net.pur<-rbind(output.mod.pur[-nrow(output.mod.pur),c(4,6)])
#output.net.pur$coeff<-c(coeff$coeff.norm, "NA")
#output.net.pur$dir<-ifelse(output.net.pur$coeff <0, "negative", "positive")

#rownam<-!(rownames(output.mod.pur)%in% c("code_occup", "Residual"))
#output.net.pur<-rbind(output.mod.pur[c(rownam),c(4,6)], 
#                      output.mod.pur[rownames(output.mod.pur)%in%"code_occup",c(4,6)],
#                      output.mod.pur[rownames(output.mod.pur)%in%"code_occup",c(4,6)],
#                      output.mod.pur[rownames(output.mod.pur)%in%"code_occup",c(4,6)],
#                      output.mod.pur[rownames(output.mod.pur)%in%"code_occup",c(4,6)],
#                      output.mod.pur[rownames(output.mod.pur)%in%"code_occup",c(4,6)])

#CO.coeff<-as.data.frame(coeff[grep("^code_occup", rownames(coeff)),])
#rownames(CO.coeff)<-c("code_occupCO1", "code_occupCO2", "code_occupCO3", "code_occupCO4")
#colnames(CO.coeff)<-c("coeff")
#output.net.pur$coeff<-c(coeff$coeff.norm[!rownames(coeff) %in%  rownames(CO.coeff)], 
#                        "NA", 
#                        "NA", 
#                        coeff$coeff.norm[rownames(coeff) %in%  rownames(CO.coeff)])
#output.net.pur$dir<-ifelse(output.net.pur$coeff <0, "negative", "positive")

##################### STEP 6: Resuming the effects by group and individuals ##################### 
#### Processus
climat<-c(0) #as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="Tem_mean"])
pedologie<-as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="pH"]) + 
  #as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="Clay"]) + 
  as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="Silt"]) +
  #as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="K_tot"]) +
  as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="P_ass"]) 
  #as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="C_N"]) 
  #as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="Organic_Carbon"]) 
usage<-as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="code_occup"])
spatial<-as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="y_reel"]) + 
  #as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="x_reel"]) + 
  #as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="Elevation"]) +
  as.numeric(output.net$variance_net[2])
interaction<-ifelse(as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="interactions"])>0, 
                    as.numeric(output.net.pur$variance_exp[rownames(output.net.pur)=="interactions"]), 
                    0 )
residus<-as.numeric(output.net$variance_net[rownames(output.net)=="Residual"]) 
covariables<-as.data.frame(cbind(c("Physico-chimie du sol", "Mode d'usage du sol", "Descripteurs géographiques", "Climat", "Interactions", "Résidus"), 
                                 (rbind(pedologie, usage, spatial, climat, interaction, residus))))
colnames(covariables)<-c("type", "variance_exp")

output.net.pur2<-output.net.pur[!rownames(output.net.pur)%in%c("code_occup", "interactions"),]
output.net.pur2<-output.net.pur2[order(output.net.pur2$dir),]

#### Purs effects
datp<-output.net.pur2[output.net.pur2$dir=="positive",]
dim(datp)
datp[nrow(datp)+1,]<-c("<NA>", sum(as.numeric(datp$variance_exp[grep("^X", rownames(datp))])), "<NA>", "positive")
datp.pur<-datp[c(1:(nrow(datp)-(nrow(datp[grep("^X", rownames(datp)),])+1)), nrow(datp)), c(2,4)] # datp[c(1:4, 13), c(2,4)]
datp.pur$variance_exp<-round((as.numeric(datp.pur$variance_exp)), digits=2)
for(i in 1:nrow(datp.pur)){
  if (rownames(datp.pur[i,]) %in% nomenc$var.names) {
    datp.pur$mot[i]<-as.character(nomenc$legends[nomenc$var.names==rownames(datp.pur[i,])])
    datp.pur$col[i]<-as.character(nomenc$col.leg[nomenc$var.names==rownames(datp.pur[i,])])
  }
  if (!rownames(datp.pur[i,]) %in% nomenc$var.names) {
    datp.pur$mot[i]<-c("Descripteurs spatiaux1")
    datp.pur$col[i]<-c("yellow1")
  }
}

datn<-output.net.pur2[output.net.pur2$dir=="negative",]
dim(datn)
datn[nrow(datn)+1,]<-c("<NA>", sum(as.numeric(datn$variance_exp[grep("^X", rownames(datn))])), "<NA>", "negative")
datn.pur<-datn[c(1:(nrow(datn)-(nrow(datn[grep("^X", rownames(datn)),])+1)), nrow(datn)), c(2,4)] # datn[c(1:4, 13), c(2,4)]
datn.pur$variance_exp<-round((as.numeric(datn.pur$variance_exp)), digits=2)
for(i in 1:nrow(datn.pur)){
  if (rownames(datn.pur[i,]) %in% nomenc$var.names) {
    datn.pur$mot[i]<-as.character(nomenc$legends[nomenc$var.names==rownames(datn.pur[i,])])
    datn.pur$col[i]<-as.character(nomenc$col.leg[nomenc$var.names==rownames(datn.pur[i,])])
  }
  if (!rownames(datn.pur[i,]) %in% nomenc$var.names) {
    datn.pur$mot[i]<-c("Descripteurs spatiaux2")
    datn.pur$col[i]<-c("yellow1")
  }
}

dat.pur<-rbind(datp.pur, datn.pur)
dat.pur$hist<-ifelse(dat.pur$dir=="negative", as.numeric(dat.pur$variance_exp)*(-1), dat.pur$variance_exp)
dat.pur<-dat.pur[order(dat.pur$hist),]
dat.pur$mot<-factor(dat.pur$mot, levels=dat.pur$mot)

#### Usage Modes
coeff.usage<-coeff[grep("^code_occup", rownames(coeff)),]
coeff.usage<-round((as.numeric(coeff.usage)), digits=4)
nom.usage<-c("Cultures", "Prairies", "Forêts", "Vignes/Vergers")
col.usage<-c("orange", "green", "seagreen4", "purple")
coeff.CO<-as.data.frame(cbind(nom.usage, coeff.usage, col.usage))

##################### STEP 7: Saving the results ##################### 
write.csv(colnames(env.var.exp2), file=paste(liste_chemins, paste(taxon, "Variables.after regsubsets.csv", sep="."), sep="/"))
write.csv(output.mod, file=paste(liste_chemins, paste(taxon, "PV_env.model.csv", sep="."), sep="/"))
write.csv(output.mod.pur, file=paste(liste_chemins, paste(taxon, "PV_env.pur.csv", sep="."), sep="/"))
write.csv(coeff, file=paste(liste_chemins, paste(taxon, "PV_env.coeff.csv", sep="."), sep="/"))
write.csv(output.net, file=paste(liste_chemins, paste(taxon, "PV_env_pcnm.model.csv", sep="."), sep="/"))
write.csv(output.net.pur, file=paste(liste_chemins, paste(taxon, "PV_env_pcnm.pur.csv", sep="."), sep="/"))
write.csv(covariables, file=paste(liste_chemins, paste(taxon, "PV_env_pcnm.grp.csv", sep="."), sep="/"))
write.csv(dat.pur, file=paste(liste_chemins, paste(taxon, "PV_env_pcnm.purF.csv", sep="."), sep="/"))
write.csv(coeff.CO, file=paste(liste_chemins, paste(taxon, "PV_env_pcnm.coeffCO.csv", sep="."), sep="/"))

##################### STEP 8: Plotting ##################### 

# Function to save in PDF, EPS, PNG
ExportPlot <- function(gplot, filename, width=2, height=1.5) {
  # Export plot in PDF and EPS.
  # Notice that A4: width=11.69, height=8.27
  ggsave(paste(filename, '.pdf', sep=""), gplot, width = width, height = height)
  print(gplot)
  dev.off()
  cairo_ps(file = paste(filename, '.eps', sep=""), width = width, height = height)
  print(gplot)
  dev.off()
  png(file = paste(filename, '_.png', sep=""), width = width * 100, height = height * 100)
  print(gplot)
  dev.off()
}

# Plot by group
covariables<-read.csv2(paste(liste_chemins, paste(taxon, "PV_env_pcnm.grp.csv", sep="."), sep="/"), dec=".", sep=",", row.names=1)
covariables<-covariables[order(covariables$type),]

g<-ggplot(data=covariables[-6,], 
          aes(x = type, 
              y = variance_exp, 
              fill=type, 
              label=paste(type, "\n", "(",round(variance_exp, digits=1), "%)", sep=""))) + # label=function (x) str_wrap(x,width=10)
  coord_polar()  +
  scale_y_continuous() +
  geom_bar(width = 1, 
           alpha=0.5, 
           stat="identity", 
           fill=c("blue1", "yellow1", "red1", "green1", "orange1"), 
           color=c("blue4", "yellow4", "red4", "green4", "orange4")) + 
  geom_text( x=c(36, 72, 108, 144, 180), 
             y=2/3* max(covariables[-6,]$variance_exp), 
             size=8) +
  theme_minimal() +
  theme(plot.margin= unit(c(1,3,1,3), "cm")) +
  theme(axis.text.x=element_blank(), # (size=15) 
        axis.text.y=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank()) +
  theme(axis.ticks=element_blank()) +
  guides(colour = guide_legend(title.hjust = 15)) 

ExportPlot(gplot=g, filename=paste(liste_chemins, paste(taxon, "PV_plot_grp", sep="."), sep="/"), width=11.69, height=8.27 )

# Plot the qualitative variable
coeff.CO<-read.csv2(paste(liste_chemins, paste(taxon, "PV_env_pcnm.coeffCO.csv", sep="."), sep="/"), dec=".", sep=",", row.names=1)
coeff.CO<-coeff.CO[order(coeff.CO$coeff.usage),]

c<-ggplot(data=coeff.CO, aes(x = nom.usage, y = coeff.usage*10)) + 
  geom_bar(stat="identity", aes(fill=as.character(coeff.CO$col.usage))) + 
  geom_text(aes(label= coeff.CO$nom.usage), 
            y= 0.5*coeff.CO$coeff.usage*10, 
            size=9) + 
  theme(axis.title.y=element_blank(), 
        axis.title.x=element_blank(), 
        axis.text.x=element_blank(), 
        axis.text.y=element_blank(), 
        legend.position="none", 
        panel.background = element_blank()) +
  scale_y_continuous() +
  scale_x_discrete(limits=coeff.CO$nom.usage) +
  scale_fill_manual(values=as.character(coeff.CO$col.usage)) + 
  coord_flip()

ExportPlot(gplot=c, filename=paste(liste_chemins, paste(taxon, "PV.effets.code_occup.barplot", sep="."), sep="/"), width=11.69, height=8.27 )


# Plot by variables
dat.pur<-read.csv2(paste(liste_chemins, paste(taxon, "PV_env_pcnm.purF.csv", sep="."), sep="/"), dec=".", sep=",", row.names=1)
dat.purwCO<-dat.pur[-c(grep("^code_occup", rownames(dat.pur))),]
dat.purwCO<-dat.purwCO[order(dat.purwCO$hist),]
dat.purwCO$mot<-factor(dat.purwCO$mot, levels=dat.purwCO$mot)
# dat.purwCO<-dat.purwCO[dat.purwCO$variance_exp!=0,]

s<-ggplot(data=dat.purwCO, aes(x = mot, y = hist)) + #, color=c("blue", "green", "yellow", "orange", "red", "purple")) + #, fill = type
  geom_bar(stat="identity",aes(fill=mot)) + #c("blue", "green", "yellow", "orange", "red")) + #width = 1, alpha=0.5,
  geom_text(aes(label=paste(abs(dat.purwCO$hist), "%")), 
            y=0.5*dat.purwCO$hist, 
            size=7) + 
  theme(axis.title.y=element_blank(), 
        axis.title.x=element_blank(), 
        axis.text.x=element_blank(), 
        axis.text.y=element_text(size=25), 
        legend.position="none",
        panel.background = element_blank()) +
  scale_y_continuous() +
  scale_fill_manual(values=as.character(dat.purwCO$col)) + 
  coord_flip()

ExportPlot(gplot=s, filename=paste(liste_chemins, paste(taxon, "PV.effetspurs.barplot", sep="."), sep="/"), width=11.69, height=8.27 )



#### To resume:  ---------
setwd('/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Data/Data used/')
ppm<-read.csv2("Taxonomy_phylum-classe.metropol.csv", h=T, sep=",", row.names=1)
covmetro<-read.csv2("covariable RMQS.metropol.csv", h=T, dec=".",  row.names=1)
env<-as.data.frame(covmetro) # env is the generic name of environmental variables within the script
bio<-ppm # bio is the generic name of independant variables matrix within the script
taxon<-colnames(bio)[11]
bio$taxon<-bio[,11]
CHEMIN=paste('/Users/Battle/Documents/5- Post-doc/Biocom/4-RMQS/Partition de Variance/Resultats')
liste_chemins=path.expand(paste(CHEMIN,taxon, sep="/")) ## Dossier num de perm contenant les résultats d'analyse
covariables<-read.csv2(paste(liste_chemins, paste(taxon, "PV_env_pcnm.grp.csv", sep="."), sep="/"), dec=".", sep="" row.names=1)
dat.pur<-read.csv2(paste(liste_chemins, paste(taxon, "PV_env_pcnm.purF.csv", sep="."), sep="/"), dec=".", sep=",", row.names=1)

